# Figure_A04.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file produces Figure A4 in the appendix to the article: "Effects of 
# education on redistribution-related attitudes (OLS and 2SLS estimates)." If   
# you change the OTHER_PANELS argument in the code block immediately below, 
# the file will instead produce other appendix figures.



if (interactive()) {
  TSLS_ESTIMATES_ON_BOTTOM <- TRUE   # Main estimates on bottom, "OTHER_PANELS" results on top
  OTHER_PANELS             <- 'OLS'  # "2SLS_OVER_34", "HSGRAD", "OLS", "IMPUTED", "SOUTH", "WHITES_ONLY", "WHITE_SOUTHERNERS"
  STANDARDIZE              <- FALSE  # Standardize the outcomes?
  WHITES_ONLY              <- FALSE  # Analyze data from whites alone -- for both the "main" and the "other" panels?
} else {
  clArgs <- commandArgs(trailingOnly = TRUE)
  if (length(clArgs) <= 0) { 
    writeLines(paste0("clArgs has length ", length(clArgs), ".  This length is less than 1, so the script is stopping."))
    stop()
  }
  TSLS_ESTIMATES_ON_BOTTOM <- as.logical(clArgs[1])
  OTHER_PANELS             <- clArgs[2]
  STANDARDIZE              <- as.logical(clArgs[3])
  WHITES_ONLY              <- as.logical(clArgs[4])
}
library(abind)
library(Amelia)        # for mi.meld()
library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))       # for qw()
library(dplyr)         # for case_when(), rename()
library(grid)
library(Hmisc)         # for Dotplot()
library(ivpack)        # for cluster.robust.se()
library(lattice)
library(multiwayvcov)  # for cluster.vcov()

if (OTHER_PANELS == 'IMPUTED' & Sys.info()['sysname']=='Windows') memory.size(32 * 1024)
dirOutput <- 'float_output/'  
source('IV_setup.R')
source('functions/estimateModels.R')
source('functions/estTable.R')
source("functions/getClusters.R")
source('float_code/dotplotParameters.R')

varNames     <- qw('eqwlth goveqinc guarantee.7pt govt.health.7pt helppoor welfare')
dfNames      <- qw('GSS.df GSS.df   ANES.df       ANES.df         GSS.df   GSS.df') 
PDF_title    <- 'Effects of education on economic attitudes' 

filenameStem <- case_when(
  OTHER_PANELS == 'OLS'          ~ 'Figure_A04',
  OTHER_PANELS == '2SLS_OVER_34' ~ 'Figure_A07',
  OTHER_PANELS == 'SOUTH'        ~ 'Figure_A10',
  OTHER_PANELS == 'IMPUTED'      ~ 'Figure_A13',
  OTHER_PANELS == 'WHITES_ONLY'  ~ 'Figure_A17',
  OTHER_PANELS == 'HSGRAD'       ~ 'Figure_A18',
  TRUE                           ~ 'FIGURE_UNKNOWN')



##############################################################################
# CREATE MODEL ENVIRONMENTS 
##############################################################################
if (OTHER_PANELS %in% c('2SLS_OVER_34')) {
  IVModelsEnvOther  <- as.environment(as.list(IVModelsEnv))
  IVModelsEnvOver34 <- IVModelsEnvOther
}

if (WHITES_ONLY || OTHER_PANELS %IN% qw('WHITES_ONLY WHITE_SOUTHERNERS')) {
  IVModelsEnvOther <- eapply(IVModelsEnv, function (x) {
      tmp <- update(x, . ~ . - race - blackPostBrown - MSDuringRepeal - SCDuringRepeal | . - race - blackPostBrown - MSDuringRepeal - SCDuringRepeal)
      attributes(tmp)$modNum <- attributes(x)$modNum
      tmp
    })  
  IVModelsEnvWhites <- as.environment(IVModelsEnvOther)
}

if (OTHER_PANELS == 'HSGRAD') {
  IVModelsEnvOther <- eapply(IVModelsEnv, function (x) {
      tmp <- update(x, . ~ . - educ + HSgrad | . )
      attributes(tmp)$modNum <- attributes(x)$modNum
      tmp
    })  
  IVModelsEnvHSgrad <- as.environment(IVModelsEnvOther)
}

if (OTHER_PANELS == 'SOUTH') {
  IVModelsEnvNonSouth <- eapply(IVModelsEnv, function (x) {
      tmp <- update(x, . ~ . - blackPostBrown - MSDuringRepeal - SCDuringRepeal | . - blackPostBrown - MSDuringRepeal - SCDuringRepeal)
      attributes(tmp)$modNum <- attributes(x)$modNum
      tmp
    }) %>%
  as.environment()
}



# GET OLS MODELS
LMModelsEnv <- eapply(IVModelsEnv, getLMFormula)
LMModelsEnv <- addModNumAttribute(LMModelsEnv)



##############################################################################
# SET UP IMPUTED DATA FRAMES 
##############################################################################
if (OTHER_PANELS == 'IMPUTED') {
  ANESGuarantee <- readRDS('data/ANESImputedDatasetGuarantee_coded.RDS') 
  ANESHealth    <- readRDS('data/ANESImputedDatasetHealth_coded.RDS')
  GSSEqwlth     <- readRDS('data/GSSImputedDatasetEqwlth_coded.RDS')
  GSSGoveqinc   <- readRDS('data/GSSImputedDatasetGoveqinc_coded.RDS')
  GSSHelppoor   <- readRDS('data/GSSImputedDatasetHelppoor_coded.RDS')
  GSSWelfare    <- readRDS('data/GSSImputedDatasetWelfare_coded.RDS')
    
  
  # MAKE DATA FRAMES THAT WILL BE USED FOR ESTIMATION  
  ANESGuarantee.dfList <- list()
  for (impNum in 1:length(ANESGuarantee)) {
    ANESGuarantee.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = ANESGuarantee[[impNum]],
      otherVarNames = c('guarantee.7pt', 'race', 'yearInt')) 
  }
  
  ANESHealth.dfList <- list()
  for (impNum in 1:length(ANESHealth)) {
    ANESHealth.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = ANESHealth[[impNum]],
      otherVarNames = c('govt.health.7pt', 'race', 'yearInt'))}
  
  GSSGoveqinc.dfList <- list()
  for (impNum in 1:length(GSSGoveqinc)) {
    GSSGoveqinc.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = GSSGoveqinc[[impNum]],
      otherVarNames = c('goveqinc', 'race', 'yearInt'))
  }
  
  GSSEqwlth.dfList <- list()
  for (impNum in 1:length(GSSEqwlth)) {
    GSSEqwlth.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = GSSEqwlth[[impNum]],
      otherVarNames = c('eqwlth', 'race', 'yearInt'))
  }
  
  GSSHelppoor.dfList <- list()
  for (impNum in 1:length(GSSHelppoor)) {
    GSSHelppoor.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = GSSHelppoor[[impNum]],
      otherVarNames = c('helppoor', 'race', 'yearInt'))
  }
  
  GSSWelfare.dfList <- list()
  for (impNum in 1:length(GSSWelfare)) {
    GSSWelfare.dfList[[impNum]] <- makeIVEstimationDataFrame(
      var.envir     = emptyenv(), 
      model.envir   = IVModelsEnv,
      df            = GSSWelfare[[impNum]],
      otherVarNames = c('welfare', 'race', 'yearInt'))
  }  
}



##############################################################################
# SET UP SOUTH-VS.-NONSOUTH DATA FRAMES 
##############################################################################
if (OTHER_PANELS %IN% qw('SOUTH WHITE_SOUTHERNERS')) {
  southernStates_Census <- qw("TX OK AR LA MS AL FL GA SC NC TN KY WV VA DC MD DE")
  ANES.df.South    <- subset(ANES.df,  stateYoung %in% southernStates_Census)
  ANES.df.nonSouth <- subset(ANES.df, !stateYoung %in% southernStates_Census)
  GSS.df.South     <- subset(GSS.df,   stateYoung %in% southernStates_Census)
  GSS.df.nonSouth  <- subset(GSS.df,  !stateYoung %in% southernStates_Census)
  ANES.df.South$stateYoung    <- droplevels(ANES.df.South$stateYoung)
  ANES.df.nonSouth$stateYoung <- droplevels(ANES.df.nonSouth$stateYoung)
  GSS.df.South$stateYoung     <- droplevels(GSS.df.South$stateYoung)
  GSS.df.nonSouth$stateYoung  <- droplevels(GSS.df.nonSouth$stateYoung)
  dfNamesSouth     <- paste0(dfNames, '.South') 
  dfNamesNonSouth  <- paste0(dfNames, '.nonSouth')
}



##############################################################################
# PRELIMINARIES FOR DRAWING THE FIGURE
##############################################################################
panelWidth       <- list(1.00, "inches")       # width 1.09" allows 4 columns on 8.5" paper
panelHeight      <- list(1.04, "inches")
stripHeight      <- list(lines = 1.33, lineheight = 14) # lineheight is space between lines
panelBorderWidth <- .3
panelLayout      <- c(6, 2)  # columns, then rows
nPanels          <- panelLayout[1] * panelLayout[2] 
xBetween         <- 0.8000   # space between columns
yBetween         <- 4.25     # space between rows
baseCexSize      <- 1.00     # cex
stripTextSize    <-  .700 * baseCexSize 
xLabSize         <- 1.000 * baseCexSize 
yLabSize         <- 1.000 * baseCexSize 
xAxisTextSize    <-  .670 * baseCexSize 
yAxisTextSize    <-  .710 * baseCexSize
axisTickSize     <- .4
if (WHITES_ONLY) {
  xlistLimits <- split(
    x = c(rep(c(0, 50), 6), rep(c(-1.2, 1.2), 6)), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = c(rep(c('10', '25', '40'),         6), 
          rep(c('-.9', '-.3', '.3', '.9'), 6)), 
    f = c(rep(1:6,  each = 3), 
          rep(7:12, each = 4)))    
} else if (STANDARDIZE) {
  xlistLimits <- split(
    x = c(rep(c(0, 50), 6), rep(c(-.42, .42), 6)), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = c(rep(c('10', '25', '40'),         6), 
          rep(c('-.3', '-.1', '.1', '.3'), 6)), 
    f = c(rep(1:6,  each = 3), 
          rep(7:12, each = 4)))
} else if (OTHER_PANELS == 'HSGRAD') {
  xlistLimits <- split(
    x = c(rep(c(-.65, 1.35), 6), rep(c(-.15, .25), 6)), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = c(rep(c('-.4', '.1', '.6', '1.1'), 6), 
          rep(c('-.1', '0', '.1', '.2'),   6)), 
    f = c(rep(1:6,  each = 4), 
          rep(7:12, each = 4)))
} else if (OTHER_PANELS == 'IMPUTED') {
  xlistLimits <- split(
    x = rep(c(-.15, .25), 12), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = rep(c('-.1', '0', '.1', '.2'), 12), 
    f = rep(1:12, each = 4))  
} else if (OTHER_PANELS == 'OLS') {
  xlistLimits <- split(
    x = c(rep(c(-.0075, .0375), 6), rep(c(-.2, .5), 6)), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = c(rep(qw("0 .01 .02 .03"), 6), rep(qw("-.15 .05 .25 .45"), 6)), 
    f = rep(1:12, each = 4))  
} else if (OTHER_PANELS %IN% qw('SOUTH WHITE_SOUTHERNERS')) {
  xlistLimits <- split(
    x = rep(c(-.775, .775), 12), 
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = rep(qw("-.6 -.2 .2 .6"), 12), 
    f = rep(1:12, each = 4))  
} else if (OTHER_PANELS %in% qw("2SLS_OVER_34 WHITES_ONLY")) {    
  xlistLimits <- split(
    x = c(
      rep(c(-.30, .50), 6),
      rep(c(-.15, .40), 6)),
    f = rep(1:nPanels, each = 2))
  xlistLabels <- split(
    x = c(rep(qw('-.2   0   .2 .4'),  6), 
          rep(qw('-.1  .05  .2 .35'), 6)), 
    f = c(rep(1:6,  each = 4), 
          rep(7:12, each = 4)))
} 

xlistAt <- lapply(xlistLabels, as.numeric)
xlist <- list(
  draw        = TRUE,  # if ybetween is too small, will only draw for bottom panels 
  limits      = xlistLimits,
  labels      = xlistLabels,  # xlistLabels,
  at          = xlistAt,  # xlistAt,
  tck         = c(axisTickSize, 0), 
  col         = panelBorderCol, 
  cex         = xAxisTextSize,
  alternating = 1, 
  relation    = 'free', 
  axs         = 'i')  # axs='i' means that there is no padding around the xlimits
ylist <- list(
  draw        = TRUE,
  limits      = c(.5, length(IVModelsEnv)+.6),
  labels      = c(
    "Baseline model (Eq. 2)",
    "\U2026with cohort\U00ADyear fixed effects",
    "\U2026with state\U00ADwhen\U00ADyoung \U00D7 year\U00ADwhen\U00ADyoung trends",
    "\U2026with political and demographic controls"),
  at          = length(IVModelsEnv):1,
  tck         = 0,
  col         = panelBorderCol,
  cex         = yAxisTextSize,
  alternating = c(1, 1),
  relation    = 'same',  
  axs         = 'i')



##############################################################################
# STANDARDIZE OUTCOMES 
##############################################################################
if (STANDARDIZE) {
  for (i in 1:length(varNames)) {
    myDF <- get(dfNames[i])
    myDF[, varNames[i]] <- scale(myDF[, varNames[i]])
    assign(dfNames[i], myDF)
  }
  rm(i, myDF)
}



##############################################################################
# ESTIMATE MODELS WITH IMPUTED DATA
##############################################################################
if (OTHER_PANELS == 'IMPUTED') {
  dfListNamesEduc           <- qw(
    "GSSEqwlth.dfList  GSSGoveqinc.dfList ANESGuarantee.dfList
     ANESHealth.dfList GSSHelppoor.dfList GSSWelfare.dfList")

  IVModelObjectsEnvListEduc  <- list()
  IVClustersForImputedModels <- list()
  IVModelObjectsDataframes   <- list() 
  
  for (impNum in 1:length(get(dfListNamesEduc[1]))) {  #     

    cat(paste0('Estimating models with imputed dataset ', impNum, '...\n'))
    
    # Set up the data frames for this particular round of imputed datasets.
    # The "dfNamesImputed" variable is a character vector that holds six 
    # names, each corresponding to a data frame.  
    dfNamesImputed <- sub('.dfList', '.df', dfListNamesEduc)
    for (namePos in 1:length(dfNamesImputed)) {    
      assign(
        x     = dfNamesImputed[namePos], 
        value = get(dfListNamesEduc[namePos])[[impNum]])
    }
    
    # Estimate the models.  Remember that estimateModels() returns an 
    # environment that contains a series of IV objects.
    IVModelObjectsEnvListEduc[[impNum]] <- estimateModels(
      varNames     = varNames, 
      modelEnvir   = IVModelsEnv,   
      dfNames      = dfNamesImputed, 
      objectSuffix = '.IV') 
  }
  
  # MAKE A PLAIN TABLE OF ESTIMATES 
  for (dfName in dfListNamesEduc) {
    assign(
      x     = sub('.dfList', '.df', dfName),
      value = get(dfName)[[1]])
  }

  # Create the estTable for the first set of imputations. (There are 10 sets 
  # of imputations for each regression.)  
  mainEstsEduc <- estTable(
    varNames       = varNames, 
    treatment      = 'educ', 
    objectEnvir    = IVModelObjectsEnvListEduc[[1]],
    clusterSEs     = TRUE)
  
  # Work on the remaining sets of imputations.
  for (i in 2:length(IVModelObjectsEnvListEduc)) {
    for (dfName in dfListNamesEduc) {
      assign(
        x     = sub('.dfList', '.df', dfName),
        value = get(dfName)[[i]])
    }
    mainEstsEduc <- abind(
      mainEstsEduc, 
      estTable(
        varNames       = varNames, 
        treatment      = 'educ', 
        objectEnvir    = IVModelObjectsEnvListEduc[[i]],
        clusterSEs     = TRUE),
      along = 3)
  }
  mainEstsEducFinal <- matrix(
    data = NA, 
    nrow = nrow(mainEstsEduc[, , 1]), 
    ncol = ncol(mainEstsEduc[, , 1]))
  for (col in seq(1, ncol(mainEstsEducFinal), by = 2)) {
    for (row in 1:nrow(mainEstsEducFinal)) {
      mainEstsEducFinal[row, c(col, col + 1)] <- unlist(
        mi.meld(
          matrix(mainEstsEduc[row, col, ]), 
          matrix(mainEstsEduc[row, col+1, ])))
    }
  }
 mainEstsEducColnames <- rep(varNames, each = 2)
 mainEstsEducColnames[seq(2, 12, by = 2)] <- paste0(varNames, '.se')
 colnames(mainEstsEducFinal) <- mainEstsEducColnames
}



##############################################################################
# ESTIMATE MODELS
##############################################################################
# IVModelObjectsEnv is the "main" or "reference" set of models.  It will be 
# compared to, for example, the set of models that are estimated for white 
# southerners alone.  The code to estimate those other models appears later in
# this section.  
IVModelObjectsEnv <- estimateModels(
  varNames     = varNames, 
  modelEnvir   = if (WHITES_ONLY) {
      IVModelsEnvWhites 
    } else if (OTHER_PANELS == 'SOUTH') {
      IVModelsEnvNonSouth
    } else {
       IVModelsEnv
    },
  dfNames      = if (OTHER_PANELS=='SOUTH') dfNamesNonSouth else dfNames,
  subset       = if (WHITES_ONLY) "race %in% c('white', 'WHITE')",
  objectSuffix = '.IV') 
IVClusters <- lapply(IVModelObjectsEnv, getClusters)

if (OTHER_PANELS == 'OLS') {
  LMModelObjectsEnv <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = as.environment(LMModelsEnv), 
    dfNames      = dfNames, 
    subset       = if (WHITES_ONLY) "race %in% c('white', 'WHITE')",
    objectSuffix = '.lm',
    estFun       = lm)
  LMModelObjectsList   <- as.list(LMModelObjectsEnv)
  LMModelClusters      <- lapply(LMModelObjectsList, getClusters)
  LMModelVcovClustered <- mapply(
    FUN     = cluster.vcov,
    model   = LMModelObjectsList,
    cluster = LMModelClusters)
} else if (OTHER_PANELS == '2SLS_OVER_34') {
  ANES.dfOver34 <- ANES.df[ANES.df$age >= 34, ]
  GSS.dfOver34  <- GSS.df [GSS.df$age  >= 34, ]
  dfNamesOver34 <- paste0(dfNames, 'Over34')
  IVModelObjectsEnvOver34 <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = IVModelsEnvOver34, 
    dfNames      = dfNamesOver34,
    subset       = if (WHITES_ONLY) "race %in% c('white', 'WHITE')",
    objectSuffix = '.IV')
} else if (OTHER_PANELS == 'HSGRAD') {
  IVModelObjectsEnvHSgrad <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = IVModelsEnvHSgrad, 
    dfNames      = dfNames,
    subset       = if (WHITES_ONLY) "race %in% c('white', 'WHITE')",
    objectSuffix = '.IV')    
} else if (OTHER_PANELS == 'SOUTH') {
  IVModelObjectsEnvSouth <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = IVModelsEnv, 
    dfNames      = dfNamesSouth,
    subset       = if (WHITES_ONLY) "race %in% c('white', 'WHITE')",
    objectSuffix = '.IV')
} else if (OTHER_PANELS == 'WHITES_ONLY') {
  IVModelObjectsEnvWhitesOnly <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = IVModelsEnvWhites, 
    dfNames      = dfNames,
    subset       = "race %in% c('white', 'WHITE')",
    objectSuffix = '.IV')       
} else if (OTHER_PANELS == 'WHITE_SOUTHERNERS') {
  IVModelObjectsEnvWhiteSoutherners <- estimateModels(
    varNames     = varNames, 
    modelEnvir   = IVModelsEnvWhites, 
    dfNames      = dfNamesSouth,
    subset       = "race %in% c('white', 'WHITE')",
    objectSuffix = '.IV')
}



##############################################################################
# CREATE DATA FRAME WITH DATA TO PLOT
##############################################################################
dataToPlot <- expand.grid(
  lower     = NA, 
  pred      = NA, 
  upper     = NA,
  estimator = c(OTHER_PANELS, '2SLS'),
  model     = 1:length(IVModelsEnv),
  depVar    = varNames)

for (varName in varNames) {
  for (modNum in unique(dataToPlot$model)) {

    # Get and record normal 2SLS estimates and CIs
    tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnv)
    tmpEducClusters <- get(paste0(varName, '.IV', modNum), IVClusters)
    coefEstEduc <- coeftest(tmpEduc)['educ',     'Estimate']
    coefSEEduc  <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ',     'Std. Error']   
    dataToPlot[dataToPlot$estimator == '2SLS' & dataToPlot$model == modNum & dataToPlot$depVar == varName, c("lower", "pred", "upper")] <- c(
      coefEstEduc - 1.96*coefSEEduc,
      coefEstEduc,
      coefEstEduc + 1.96*coefSEEduc)
    
    # Get age-restricted 2SLS estimates and CIs
    if (OTHER_PANELS == '2SLS_OVER_34') {       
      tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnvOver34)
      tmpEducClusters <- getClusters(tmpEduc)
      coefEstEduc     <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Estimate']
      coefSEEduc      <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Std. Error']
    } 
    
    # Get HSgrad estimates and CIs
    if (OTHER_PANELS == 'HSGRAD') {
      tmpEduc     <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnvHSgrad)
      tmpEducClusters <- getClusters(tmpEduc) 
      coefEstEduc <- cluster.robust.se(tmpEduc, tmpEducClusters)['HSgradTRUE', 'Estimate']
      coefSEEduc  <- cluster.robust.se(tmpEduc, tmpEducClusters)['HSgradTRUE', 'Std. Error']
    }

    # Get missing-data-imputed estimates and CIs. Clustering for these 
    # estimates was done above.
    if (OTHER_PANELS == 'IMPUTED') {       
      coefEstEduc <- mainEstsEducFinal[modNum, varName]
      coefSEEduc  <- mainEstsEducFinal[modNum, paste0(varName, '.se')]
    } 
    
    # Get OLS estimates and CIs
    if (OTHER_PANELS == 'OLS') {
      tmpEduc     <- get(paste0(varName, '.lm', modNum), LMModelObjectsEnv)
      tmpEducVcov <- get(paste0(varName, '.lm', modNum), LMModelVcovClustered)
      coefEstEduc <- coeftest(tmpEduc)['educ',     'Estimate']
      coefSEEduc  <- coeftest(tmpEduc, vcov. = tmpEducVcov)['educ', 'Std. Error']
    }
    
    # Get estimates and CIs for subjects who grew up in the South
    if (OTHER_PANELS == 'SOUTH') {
      tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnvSouth)
      tmpEducClusters <- getClusters(tmpEduc) 
      coefEstEduc     <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Estimate']
      coefSEEduc      <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Std. Error']
    }

    # Get estimates and CIs for -white- subjects who grew up in the South
    if (OTHER_PANELS == 'WHITE_SOUTHERNERS') {
      tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnvWhiteSoutherners)
      tmpEducClusters <- getClusters(tmpEduc) 
      coefEstEduc     <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Estimate']
      coefSEEduc      <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Std. Error']
    }
    
    # Get whites-only estimates and CIs
    if (OTHER_PANELS == 'WHITES_ONLY') {
      tmpEduc         <- get(paste0(varName, '.IV', modNum), IVModelObjectsEnvWhitesOnly)
      tmpEducClusters <- getClusters(tmpEduc) 
      coefEstEduc     <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Estimate']
      coefSEEduc      <- cluster.robust.se(tmpEduc, tmpEducClusters)['educ', 'Std. Error']
    }
          
    # Record the "other panel" estimates and CIs
    dataToPlot[dataToPlot$estimator == OTHER_PANELS & dataToPlot$model == modNum & dataToPlot$depVar == varName, c("lower", "pred", "upper")] <- c(
      coefEstEduc - 1.96*coefSEEduc,
      coefEstEduc,
      coefEstEduc + 1.96*coefSEEduc)        
    
  }
} 


# Order the panels according to the order of varNames, which is the order that
# I use for the tables in the paper.  
for (i in 1:length(varNames)) {
  dataToPlot$panelNum[grepl(varNames[i], dataToPlot$depVar)] <- i
}

# Add row numbers for data in each panel.  
dataToPlot$rowNum <- length(IVModelsEnv)+1 - dataToPlot$model



##############################################################################
# FIND MINIMUM AND MAXIMUM SAMPLE SIZES
##############################################################################
if (OTHER_PANELS == 'IMPUTED') {
  tmp <- IVModelObjectsEnvListEduc[[1]]
}
objectEnvirList <- list(
  '2SLS_OVER_34'       = 'IVModelObjectsEnvOver34',
  'HSGRAD'             = 'IVModelObjectsEnvHSgrad',
  'IMPUTED'            = 'tmp',
  'OLS'                = 'LMModelObjectsEnv',
  'SOUTH'              = 'IVModelObjectsEnvSouth',
  'WHITES_ONLY'        = 'IVModelObjectsEnvWhitesOnly',
  'WHITE_SOUTHERNERS'  = 'IVModelObjectsEnvWhiteSoutherners')
nMin <- apply(
  X = getNobsForMatrixOfModels(
    varNames, 
    objectEnvir  = get(objectEnvirList[[OTHER_PANELS]]),
    objectSuffix = if (OTHER_PANELS=='OLS') '.lm' else '.IV'), 
  MARGIN = 2, 
  FUN    = min)
nMax <- apply(
  X = getNobsForMatrixOfModels(
    varNames, 
    objectEnvir  = get(objectEnvirList[[OTHER_PANELS]]),
    objectSuffix = if (OTHER_PANELS=='OLS') '.lm' else '.IV'), 
  MARGIN = 2, 
  FUN    = max)
nMin
nMax

if (OTHER_PANELS == 'IMPUTED') {
  numCasesWithImputation    <- getNobsForMatrixOfModels(varNames, objectEnvir = get(objectEnvirList[[OTHER_PANELS]]))
  numCasesWithoutImputation <- getNobsForMatrixOfModels(varNames, objectEnvir = IVModelObjectsEnv)
  numCasesDiff              <- numCasesWithImputation - numCasesWithoutImputation
  if (any(numCasesDiff < 0)) stop("Objects with imputation have fewer observations than objects with imputation.")
}

if (OTHER_PANELS == 'SOUTH') {
  nMax_nonSouth <- apply(
    X = getNobsForMatrixOfModels(
      varNames, 
      objectEnvir  = IVModelObjectsEnv,
      objectSuffix = '.IV'), 
    MARGIN = 2, 
    FUN    = max)
  cat(paste0('Minimum number of respondents in the Southerner-only panels is  ', min(nMin), ".\n"))
  cat(paste0('Maximum number of respondents in the non-Southerner  panels is ',  max(nMax_nonSouth), "."))
}



##############################################################################
# STRIP FUNCTION
##############################################################################
horizStrip <- function(...) {
  stripTextDouble <- c(stripText, stripText)
  lpolygon(
    x        = c(0,1,1,0,0), 
    y        = c(0,0,1,1,0), 
    col      = stripBackgroundCol, 
    border   = stripBorderCol,
    linejoin = 'mitre',  # for square corners
    lwd      = panelBorderWidth)
  ltext(
    x          = .5, 
    y          = .5, 
    labels     = stripTextDouble[panel.number()], 
    font       = 2, 
    cex        = stripTextSize, 
    lineheight = 1)  # .4 for one-line labels         
}  



##############################################################################
# PANEL FUNCTION
##############################################################################
myPanel <- function(...) {
  panel.abline(v = 0,     col = dotplotLineColor)
  panel.Dotplot(...)
  
  # Put tick marks at top of each panel
  panel.axis(
    side        = "top",
    at          = if (panel.number() <= 6) xlist$at[[1]] else xlist$at[[7]],
    draw.labels = FALSE,
    half        = FALSE,
    tck         = axisTickSize * .75)
}



##############################################################################
# MAKE THE PDF FILE
##############################################################################
pdf(
  file   = paste0(dirOutput, filenameStem, '.pdf'), 
  width  = PS_width, 
  height = PS_height, 
  paper  = "special", 
  title  = PDF_title,
  bg     = postscriptBackground)
trellis.par.set(
  axis.components = list(left = list(pad1 = .5)),  # distance between axis ticks and tick labels
  axis.line = list(
    alpha = 1, 
    col   = panelBorderCol,
    lty   = 1, 
    lwd   = panelBorderWidth),  
  dot.line = list(alpha=1, col=dotplotLineColor, lty=1, lwd=1),
  dot.symbol = list(
    alpha = 1, 
    cex   = .5, 
    col   = dotColor, 
    font  = 1, 
    pch   = 16),           
  plot.line = list(alpha = 1, col = dotColor, lty = 1, lwd = 1),
  superpose.line = list(
    alpha = c(1,1), 
    col   = c('black', plotLineColor), 
    lty   = c("solid", "43"), 
    lwd   = c(1, plotLineWidth)))

mainResultsDotplot <- Dotplot(
  rowNum ~ Cbind(pred, lower, upper) | panelNum * estimator, 
  data           = dataToPlot,
  panel          = myPanel,
  layout         = panelLayout,
  as.table       = TSLS_ESTIMATES_ON_BOTTOM,
  between        = list(x = xBetween, y = yBetween),                   
  strip          = horizStrip,
  par.strip.text = stripHeight,
  scales         = list(x = xlist, y = ylist),
  xlab           = '',
  ylab           = '',
  par.settings   = list(clip = list(panel="on", strip="on") ))
print(
  mainResultsDotplot, 
  panel.width  = panelWidth, 
  panel.height = panelHeight)

# ADD LABELS BELOW EACH ROW
label2SLS <- 'Marginal effects of a year of education on attitudes (2SLS estimates)'
if (STANDARDIZE) { 
  label2SLS <- sub('attitudes', 'standardized attitudes', label2SLS) 
}
if (OTHER_PANELS %in% qw('2SLS_OVER_34 WHITES_ONLY WHITE_SOUTHERNERS')) {
  label2SLS <- sub('2SLS estimates', 'all respondents', label2SLS)  
} else if (OTHER_PANELS %in% c('HSGRAD')) {
  label2SLS <- sub(' \\(2SLS estimates\\)', '', label2SLS)  
} else if (OTHER_PANELS %in% c('IMPUTED')) {
  label2SLS <- sub(
    pattern     = '(2SLS estimates)', 
    replacement = '(ordinary 2SLS estimates)', 
    x           = label2SLS, 
    fixed       = TRUE)  
} else if (OTHER_PANELS %in% c('SOUTH')) {
  label2SLS <- sub(
    pattern     = '(2SLS estimates)', 
    replacement = '(subjects who grew up outside the South)', 
    x           = label2SLS, 
    fixed       = TRUE)  
}
otherLabel <- list(
  '2SLS_OVER_34'      = sub('all respondents',     'respondents 34 or older', label2SLS),
  'HSGRAD'            = sub('a year of education', 'high school graduation',  label2SLS),
  'IMPUTED'           = paste0('Marginal effects of a year of education on attitudes (imputed\U00AD', 'data 2SLS estimates)'),
  'OLS'               = sub('2SLS',            'OLS',               label2SLS),
  'SOUTH'             = sub('outside',         'in',                label2SLS),
  'WHITES_ONLY'       = sub('all respondents', 'whites only',       label2SLS),
  'WHITE_SOUTHERNERS' = sub('all respondents', 'white southerners', label2SLS))
downViewport("plot_01.figure.vp")
grid.text(
  label = label2SLS,
  y     = if (TSLS_ESTIMATES_ON_BOTTOM) -.04 else .535,
  gp    = gpar(cex = xLabSize)) 
grid.text(
  label = otherLabel[[OTHER_PANELS]],
  y     = if (TSLS_ESTIMATES_ON_BOTTOM) .55 else -.04,
  gp    = gpar(cex = xLabSize)) 


# SAVE AND PROCESS FILE
dev.off()
if (!(Sys.which('pdfcrop')=='' | Sys.which('perl')=='')  |  'pdfcrop' %in% installed.packages()[, 'Package']) {  # if "pdfcrop" is installed 
  system(paste(
    if (Sys.info()['sysname']=='Windows') paste(Sys.getenv('Comspec'), '/c ') else '',
    'pdfcrop', 
    paste0(dirOutput, filenameStem, '.pdf'), 
    paste0(dirOutput, filenameStem, '_crop.pdf')))
  file.remove(paste0(dirOutput, filenameStem, '.pdf'))
  if (interactive() & Sys.info()['sysname']=='Windows') shell.exec(paste0(normalizePath(dirOutput), filenameStem, '_crop.pdf'))
}



##############################################################################
# AUXILIARY RESULTS
##############################################################################
if (interactive()) {

  # COMPARE ORDINARY 2SLS ESTIMATES TO IMPUTED-DATA 2SLS ESTIMATES
  if (OTHER_PANELS == 'IMPUTED') {
    imputed_coef_mean <- mean(unlist(subset(dataToPlot, estimator == 'IMPUTED', pred)))
    TSLS_coef_mean    <- mean(unlist(subset(dataToPlot, estimator == '2SLS',    pred)))
    imputed_coef_mean / TSLS_coef_mean
    
    imputed_SE_mean <- meanNA(dataToPlot$upper[dataToPlot$estimator=='IMPUTED'] - dataToPlot$lower[dataToPlot$estimator=='IMPUTED']) / 3.92
    TSLS_SE_mean    <- meanNA(dataToPlot$upper[dataToPlot$estimator=='2SLS']    - dataToPlot$lower[dataToPlot$estimator=='2SLS'])    / 3.92
    imputed_SE_mean / TSLS_SE_mean  
  
    # p-values for normal estimates
    mainEstsEducNormal <- estTable(
      varNames       = varNames, 
      treatment      = 'educ', 
      objectEnvir    = IVModelObjectsEnv,
      clusterSEs     = TRUE)
    mainEstsEducNormal.t <- mainEstsEducNormal[, seq(1, ncol(mainEstsEducNormal), by = 2)] / 
      mainEstsEducNormal[, seq(2, ncol(mainEstsEducNormal), by = 2)]  
    round(2*(1-pnorm(mainEstsEducNormal.t)), 4)  
    
    # p-values for imputed-data estimates
    getNobsForMatrixOfModels(varNames, objectEnvir = IVModelObjectsEnvListEduc[[1]])
    mainEstsEducFinal.t <- mainEstsEducFinal[, seq(1, ncol(mainEstsEducFinal), by = 2)] / 
      mainEstsEducFinal[, seq(2, ncol(mainEstsEducFinal), by = 2)]  
    round(2*(1-pnorm(mainEstsEducFinal.t)), 3)  
    
  }
  
  # AGE-RESTRICTED ESTIMATES VS. NORMAL ESTIMATES
  if (OTHER_PANELS == '2SLS_OVER_34') {
    tmp1 <- subset(dataToPlot, estimator == '2SLS', c('depVar', 'pred'))
    tmp2 <- subset(dataToPlot, estimator == '2SLS_OVER_34', c('depVar', 'pred'))
    diff <- data.frame(depVar = tmp1$depVar, diff = tmp1$pred - tmp2$pred)
    mean(tmp1$pred)
    mean(tmp2$pred)
    mean(diff$diff)
    mean(abs(diff$diff))
  }

}